In [ ]:
# imports
import os
import yaml
import numpy as np
import pandas as pd
import theano
import lasagne
import loading
from autoload_data import *    # pulls in directory names and several views and functions on data
from training import *
from network import *
from experiments import *
import architectures as arches
import autoencoder as ae

# aliases
L = lasagne.layers
T = theano.tensor

Autoencoder transfer learning

Implementation note: the autoencoder object wraps a Theano graph. This graph can be divided into an encoder, which produces a vector of latent features, a decoder, which inverts the encoder to recover the original input, and a classifier, which produces a probability distribution over [0, 35].

The encoder is shared between the encoder+decoder (Autoencoder.autoencoder) and the encoder+classifier (Autoencoder.network), so once the enc+dec is trained (unsupervised), enc can have its weights fixed and enc+cla can be trained (supervised).

You could modify this (typical) scheme; just be aware that the encoder is shared by Autoencoder.autoencoder and Autoencoder.network. See autoencoder.py for example architecture, network.py for Autoencoder implementation, and training.py for AutoencoderTrainer.


In [ ]:
autoencoder = Autoencoder(ae.autoencoder)                          # initialize autoencoder obj
ae_trainer = AutoencoderTrainer(stopthresh=1, print_interval=1)    # initialize trainer

ae_start_params = L.get_all_param_values(autoencoder.autoencoder)  # cache starting parameters(move to network obj)
ae_trainer.train_autoencoder(autoencoder, np.concatenate(data[2])) # train autoencoder

ae_end_params = L.get_all_param_values(autoencoder.autoencoder)    # cache trained params
autoencoder.freeze_params(exclude=list(range(-7, 0)))              # set all params untrainable

In [ ]:
ae_trainer.stopthresh = 5 
ae_trainer.print_interval = 20
net_list = ae_trainer.train_all(autoencoder, data=hvhdata, seed=984227)    # train classifier

Transfer learning training


In [ ]:
names = ['a4', 'deep_c1', 'regular_c1']    # list network handles from arch_specs.yaml
run_tuned_experiment(names)                # and awaaaay we go. see experiments.py for implementation

Bulk supplement training


In [ ]:
names = ['g1', 'g2', 'g4', 'h1', 'h2', 'h4']
run_bulk_experiment(names[:2])

Reinforcement learning

WIP

To Dos:

  • convert histories to reward signals
  • pass reward signals to networks
  • decide training scheme:
    • network plays self
    • network plays round robin with multiple architectures
    • network plays RR with multiple architectures AND heuristic search models

In [ ]:
import scipy.signal as sig
filters = dict()
filters['h'] = np.ones(4)[np.newaxis, :, np.newaxis]
filters['v'] = np.ones(4)[np.newaxis, np.newaxis, :]
filters['d'] = np.eye(4)[np.newaxis, :, :]
filters['u'] = np.fliplr(np.eye(4))[np.newaxis, :, :]

class GameState(object):
    """
    Unless otherwise indicated, "state" should be a (1, 2, 4, 9) tensor,
    with first channel representing OWN pieces (NOT black!)
    
    Would it better to have this as a stateless thing with class methods?
    """
    
    def init(self, state):
        self.state = state
        self.color = self.get_color()
        self.terminal = self.terminal_check()
    
    def get_color(self):
        if (self.state.sum() % 2) == 0:
            return 0
        else:
            return 1
    
    def update(self, move):
        new_state = self.state.copy()
        coords = np.unravel_index(move, (4, 9))
        new_state[0, coords[0], coords[1]] = 1
        return GameState(new_state[:, ::-1, :, :])    # invert channels!
    
    def terminal_check(self):
        if state.sum() == 36:
            return 'draw'
        for dim, fil in filters.items():
            filter_response = sig.convolve(self.state, fil, mode='valid')
            if np.where(filter_response>=4)[0].size > 0:
                return 'win'
                
        return False
    
    
class RLTrainer(object):
    """
    Unless otherwise indicated, "gstate" should be a GameState object
    """
    def init(self, reward=1000, batchsize=100):
        self.reward = reward
        self.batchsize = batchsize
    
    def choose_move(self, network, gstate):
        policy = network.output_fn(gstate.state)
        return np.random.choice(36, p=policy)
    
    def play_game(self, network, gstate):
        choices = []
        states = [gstate.state]
        
        next_state = gstate
        while not next_state.terminal:
            action = self.choose_move(network, next_state)
            choices.append(action)
            next_state = next_state.update(action)
            states.append(next_state.state)
            
        return choices, states
    
    def play_batch(self, network):
        initial_state = np.zeros((1, 2, 4, 9))
        choice_history = []
        state_history = []
        
        for i in range(batchsize):
            c, s = play_game(self, network, gstate)
            choice_history.append(c)
            state_history.append(s)
            
        return choice_history, state_history
    
    def compute_reward(self, choices):
        if len(choices) == 36:
            return 0
        if (len(choices) % 2) == 1:
            return 1
        else:
            return -1
        
    def convert_data(self, choice_history, state_history):
        """
        To dos here:
        
        - compute outcome of each game (0 for draw, 1 for black win, -1 for white)
        - tile those outcomes but alternating: 
            a game that has 10 moves ended in a win for white, so all EVEN locations
            in vector should be +1 and all ODD locations should be -1
            vice versa for a game with odd moves
        - convert each set of choices and states in respective histories to 
            np arrays (n_games x whatev dims)
            
        - LATER: figure out how to pass array of outcomes to network update function!
        """
        outcomes = [self.compute_reward(choices) for choices in choice_history]
        
        return None

Fake news!

Model recovery: train on fake data from another model.

Fit best networks on fake data


In [ ]:
run_fake_experiment(['h4'])

Prototyping

Subject tuning

Usually doesn't work (not enough data even for very simple classifier layers)


In [ ]:
dafiname = os.path.join(datadir, '0 (with groups).csv')
subject_data = [loading.default_loader(dafiname, subject=s) for s in range(40)]
arch = archs[archname]

In [ ]:
print([len(s[0]) for s in subject_data])

In [ ]:
for i in range(5):
    pafiname = '{} {} split agg fit exp 1-4.npz'.format(archname, i)
    prenet = Network(arch)
    prenet.load_params(os.path.join(paramsdir, pafiname))
    params = L.get_all_param_values(prenet.net)
    print('PREFIT {}\n'.format(i))
    
    for s in range(40):
        sdata = subject_data[s]
        num_obs = len(sdata[0])
        bs = num_obs//5
        tuner = FineTuner(stopthresh=10, batchsize=bs)
        print('SUBJECT {}\n'.format(s))
        
        for j in range(5):
            fname = '{} {} agg fit exp 1-4 {} subject {} tune fit exp 0'.format(archname, i, s, j)
            net = tuner.train_all(architecture=arch, data=sdata, split=j, startparams=params, freeze=True)
            net.save_params(os.path.join(paramsdir, fname))

Data aggregation

doesn't need run more than once - move to script sometime, wouldja?


In [ ]:
datafilenames = ['0 (with groups)', '1 (with computer)', '2 (with computer)', '3 (with computer)', '4']
datafilenames = [os.path.join(datadir, fname + '.csv') for fname in datafilenames]
colnames = ['subject', 'color', 'bp', 'wp', 'zet', 'rt']

e0 = pd.read_csv(datafilenames[0], names=colnames+['splitno'])
e1 = pd.read_csv(datafilenames[1], names=colnames)
e2 = pd.read_csv(datafilenames[2], names=colnames)
e3 = pd.read_csv(datafilenames[3], names=colnames+['task', 'taskorder', 'session'])
e4 = pd.read_csv(datafilenames[4], names=colnames+['timecondition'])
Es = [e1, e2, e3, e4]
for i, e in enumerate(Es[1:]):
    e['subject'] = e['subject'] + Es[i-1].loc[Es[i-1]['subject']<1000, 'subject'].max()

A = pd.concat([e[colnames] for e in [e1, e2, e3, e4]])

groups = np.arange(len(A))%5 + 1
np.random.seed(100001)
np.random.shuffle(groups)
A['group'] = groups

A.to_csv(os.path.join(datadir, '1-4.csv'), encoding='ASCII', header=False, index=False)
A.loc[A['subject']<1000, :].to_csv(
    os.path.join(datadir, '1-4 (no computer).csv'), 
    encoding='ASCII', header=False, index=False
)

In [ ]:
# this is for another training scheme, using preassigned groups in both hvh and other data
bulkdata_df = pd.concat([data[0], hvhdata[0]])
bulkdata_df.to_csv(os.path.join(datadir, 'bulk.csv'), index=False, header=False)
bulkdata = loading.default_loader(os.path.join(datadir, 'bulk.csv'))

Add groups to fake data


In [ ]:
fd_ = pd.read_csv(os.path.join(datadir, 'fake news.csv'), names=['subject', 'color', 'bp', 'wp', 'zet', 'rt'])

groups = np.arange(len(fd_)) % 5 + 1
np.random.shuffle(groups)
fd_['group'] = groups
fd_.to_csv(os.path.join(datadir, 'fake news (with groups).csv'), encoding='ASCII', header=False, index=False)